import pandas as pd
from pandas.plotting import autocorrelation_plot
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
df = pd.read_csv('../data/Load_history.csv')
weather = pd.read_csv('../data/temperature_history.csv')
hourColumn = [f'h{i+1}' for i in range(24)]
df["date"] = pd.to_datetime(df[['year', 'month', 'day']])
def getValue(x):
v = x[hourColumn].sum() / 24
if v == 0:
return np.nan
return v
df["average"] = df.apply(lambda x: getValue(x) , axis=1)
zones = len(df['zone_id'].unique())
def getGlobalTrend():
plt.close()
fig, ax = plt.subplots(20, figsize=(20,65))
for i in range(20):
data = df[df['zone_id']==i+1]#.sort_values(by="date")
ax[i].set_title(f'Zone {i+1}')
for j in range(2003, 2009):
d = data[data['year'] == j]
ax[i].plot(d['date'], d['average'], label=j, linewidth=0.75)
#plt.savefig('YealyChange.png')
plt.show()
getGlobalTrend()
df = pd.read_csv('aggr.csv') # each zone
df['time'] = pd.to_datetime(df['time'])
total = df.groupby(['time','t1', 't2', 't3', 't4', 't5', 't6', 't7', 't8',
't9', 't10', 't11']).sum().reset_index() # zone 21
total['time'] = pd.to_datetime(total['time'])
null_data = df[df.isnull().any(axis=1)]
null_data['time'].unique()
def getAutocorrelationPlot(df, zid, lags=400, name = 'value'):
# from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
data = df[df['zone_id']==zid]
plt.close()
fig, ax = plt.subplots(figsize=(30, 6))
df = data.resample(on='time', rule='D').mean()
title = f'ACF of Zone {zid} (Day)'
plot_acf(df[name].fillna(method="ffill"), lags=lags, ax=ax, title=title)
# plt.savefig(f'result/weather/acf_name_{zid}')
plt.show()
def getGlobalAutocorrelationPlot(df, lags=400, name='value'):
fig, ax = plt.subplots(figsize=(30, 6))
data = df.resample(on='time', rule='D').mean()
title = f'ACF of Zone 21 (Day)'
plot_acf(df[name].fillna(method="ffill"), lags=lags, ax=ax, title=title)
# plt.savefig(f'result/weather/acf_{name}')
plt.show()
getGlobalAutocorrelationPlot(total)
for zid in range(1, 21):
getAutocorrelationPlot(df, zid)
def getPartialAutocorrelationPlot(df, zid, name = 'value'):
data = df[df['zone_id']==zid]
plt.close()
fig, ax = plt.subplots(1,2,figsize=(20, 4))
df = data.resample(on='time', rule='D').mean()
title = f'PACF of Zone {zid}'
plot_pacf(df[name].fillna(method="ffill"), lags=400, ax=ax[1], title=title + ' (Day)')
plot_pacf(data[name].fillna(method="ffill"), lags=120, ax=ax[0], title=title + '(Hour)')
# plt.savefig(f'result/autocorrelation/pacf_name_{zid}')
plt.show()
def getPartialGlobalAutocorrelationPlot(df, lags=400, name='value'):
plt.close()
fig, ax = plt.subplots(1,2, figsize=(20, 4))
data = df.resample(on='time', rule='D').mean()
title = f'PACF of Zone 21'
plot_pacf(df[name].fillna(method="ffill"), lags=400, ax=ax[1], title=title + ' (Day)')
plot_pacf(data[name].fillna(method="ffill"), lags=120, ax=ax[0], title=title + ' (Hour)')
# plt.savefig(f'result/autocorrelation/pacf_21')
plt.show()
getPartialGlobalAutocorrelationPlot(total)
for zid in range(1, 21):
getPartialAutocorrelationPlot(df, zid)
def getstr(x):
return x.get_string()
total['day'] = total['time'].apply(lambda x:x.strftime("%HH"))
total['month'] = total['time'].apply(lambda x: x.strftime('%m'))
total['year'] = total['time'].apply(lambda x:x.strftime('%y'))
plt.close()
fig, ax = plt.subplots(figsize=(120, 8))
g = sns.boxplot(x='month', y='value', data=total, ax=ax, hue='day')
ax.get_legend().remove()
plt.show()
plt.close()
fig, ax = plt.subplots(figsize=(10, 5))
g = sns.boxplot(x='day', y='value', data=total, ax=ax)
plt.title('Hourly Distribution')
plt.show()
plt.close()
fig, ax = plt.subplots(figsize=(8, 5))
g = sns.boxplot(x='month', y='value', data=total, ax=ax)
plt.title('Monthly Distribution')
plt.show()
plt.close()
fig, ax = plt.subplots(figsize=(15, 7))
g = sns.boxplot(x='zone_id', y='value', data=df, ax=ax)
plt.title('Zone Distribution')
plt.show()
df['day'] = df['time'].apply(lambda x:x.strftime("%HH"))
df['month'] = df['time'].apply(lambda x: x.strftime('%m'))
df['year'] = df['time'].apply(lambda x:x.strftime('%y'))
for zid in range(1, 21):
plt.close()
fig, ax = plt.subplots(figsize=(120, 8))
g = sns.boxplot(x='month', y='value', data=df[df['zone_id']==zid], ax=ax, hue='day')
ax.get_legend().remove()
plt.title(f'Zone {zid}')
plt.show()